In the midst of a raging national debate on migration and refugees, plenty has been written about the changing profile of undocumented migrants reaching the US-Mexico border. While traditionally, Mexican migrants have been the predominant demographic group seeking to enter the United States through the southern border, that dynamic has shifted in the last decade. Central American migrants originating in the Northern Triangle countries of El Salvador, Guatemala, and Honduras have become the predominant demographic of undocumented migrants seeking to enter the United States.

# Calulcate the percent of people who've considered emigration over time

inter <- LAPOP %>% 
  select(year, pais, q14) %>%
  group_by(year, pais) %>%
  add_tally() %>%
  add_count(q14)

inter$perc <- (inter$nn / inter$n) * 100

to_graph <- inter %>%
  select(year, pais, perc, q14) %>%
  group_by(year, pais) %>%
  arrange(q14) %>%    
  slice(1)

# Plot
 
plot <- ggplot(to_graph, aes(x=as.factor(year), y=pais, fill=perc)) + geom_tile(aes(fill=perc)) + geom_text(aes(label=round(perc, 1), fontface='bold')) + mario_scale_cont 

labels <- labs(title = 'Since 2012, the percentage of Honduran residents \n planning to emigrate has increased nearly 30%',
               subtitle = 'Across the region, there are notable increases in plans to emigrate since 2012',
               caption = 'Source: LAPOP 2010-2016',
               x = 'Year',
               y = 'Country',
               fill = '% planning to emigrate')

update_theme <- theme(axis.title.x = element_blank(),
                      axis.title.y = element_blank(),
                      panel.grid = element_blank(), 
                      axis.ticks = element_blank(),
                      panel.background = element_blank(),
                      legend.position = 'none')

plot + labels + mario_theme + update_theme

rm(inter, to_graph, plot)

As evidenced by the graph above, this surge in Central American migration is evident in the percentage of citizens of Central American countries who are planning to emigrate in the next three years. Notably, in all three countries, the percentage of citizens planning to emigrate has considerably increased since 2012 – with the percentage of Hondurans planning to make the trip out of the country rising from 11% in 2012 to 40% in 2016.

Given that Honduras appears to have the highest percentage of respondents planning to emigrate in the next three years – as well as what appears to be the highest rate of change – it’s important to understand where these migrants are coming from.

subemigrate <- LAPOP %>%
  select(year, pais, prov, q14) %>%
  filter(year != 2012, year != 2014, year != 2010, prov != 414, prov != 415, prov != 416, prov != 417, prov != 219) %>%
  na.omit() %>%
  group_by(year, pais, prov) %>%
  add_tally() %>%
  add_count(q14) %>%
  distinct(year, pais, prov, q14, n, nn)

subemigrate$perc <- (subemigrate$nn / subemigrate$n) * 100

emigrate <- subemigrate %>%
  select(year, pais, prov, perc, q14) %>%
  group_by(year, pais, prov) %>%
  arrange(q14) %>%
  slice(1)  %>%
  select(year, pais, prov, perc) %>%
  group_by(year, pais, prov) %>%
  arrange(desc(perc))

emigrate$prov[emigrate$prov=='Atlántida'] <- "Atlántida"
emigrate$prov[emigrate$prov=='Colón'] <- "Colón"
emigrate$prov[emigrate$prov=='Copán'] <- "Copán"
emigrate$prov[emigrate$prov=='Cortés'] <- "Cortés"
emigrate$prov[emigrate$prov=='Francisco Morazán'] <- "Francisco Morazán"
emigrate$prov[emigrate$prov=='Intibucá'] <- "Intibucá"
emigrate$prov[emigrate$prov=='Santa Bárbara'] <- "Santa Bárbara"
emigrate$prov[emigrate$prov=='SONSONATE'] <- "Sonsonate"
emigrate$prov[emigrate$prov=='LA UNION'] <- "La Unión"
emigrate$prov[emigrate$prov=='CUSCATLAN'] <- "Cuscatlán"
emigrate$prov[emigrate$prov=='CABAÑAS'] <- "Cabañas"
emigrate$prov[emigrate$prov=='Quiché'] <- "Quiché"
emigrate$prov[emigrate$prov=='SAN VICENTE'] <- "San Vicente"
emigrate$prov[emigrate$prov=='LA LIBERTAD'] <- "La Libertad"
emigrate$prov[emigrate$prov=='SANTA ANA'] <- "Santa Ana"
emigrate$prov[emigrate$prov=='Cuscatlán'] <- "Cuscatlán"
emigrate$prov[emigrate$prov=='LA PAZ'] <- "La Paz"
emigrate$prov[emigrate$prov=='AHUACHAPAN'] <- "Ahuachapán"
emigrate$prov[emigrate$prov=='SAN MIGUEL'] <- "San Miguel"
emigrate$prov[emigrate$prov=='USULUTAN'] <- "Usulután"
emigrate$prov[emigrate$prov=='MORAZAN'] <- "Morazán"
emigrate$prov[emigrate$prov=='La Unión'] <- "La Unión"
emigrate$prov[emigrate$prov=='Usulután'] <- "Usulután"
emigrate$prov[emigrate$prov=='Suchitepéquez'] <- "Suchitepéquez"
emigrate$prov[emigrate$prov=='CHALATENANGO'] <- "Chalatenango"
emigrate$prov[emigrate$prov=='Cabañas'] <- "Cabañas"
emigrate$prov[emigrate$prov=='Sacatepéquez'] <- "Sacatepéquez"
emigrate$prov[emigrate$prov=='Totonicapán'] <- "Totonicapán"
emigrate$prov[emigrate$prov=='Ahuachapán'] <- "Ahuachapán"
emigrate$prov[emigrate$prov=='Sololá'] <- "Sololá"
emigrate$prov[emigrate$prov=='Morazán'] <- "Morazán"
emigrate$prov[emigrate$prov=='Petén'] <- "Petén"
emigrate$prov[emigrate$prov=='SAN SALVADOR'] <- "San Salvador"

diff <- emigrate %>%
  spread(year, perc) %>%
  group_by(prov)

colnames(diff)[colnames(diff)==2016] <- "perc2016"

diff_use <- diff[36:53,]

to_map_hond <- honduras_shape %>%
  stringdist_inner_join(diff_use, by=c(NAME_1='prov'), max_dist=1)
#to_map_guate <- geo_join(guate_shape, diff_use, by_sp='NAME_1', by_df='prov', how='left')
#to_map_els <- geo_join(els_shape, diff_use, by_sp='NAME_1', by_df='prov', how='left')


labels <- labs(title = 'In 2016, more residents in northeastern \n Honduran provinces planned to emigrate \n than anywhere else',
subtitle = 'In the northeastern provinces of Colón and Gracias a Dios more than 50% of \n respondents indicated plans to emigrate in the next three years',
caption = 'Source: LAPOP Honduras, 2010-2016')

updated_theme <- theme(panel.border = element_blank()) +
  theme(axis.title.y = element_blank()) +
  theme(axis.text.y = element_blank()) +
  theme(axis.text.x = element_blank()) +
  theme(panel.grid.major.y = element_blank()) +
  theme(panel.grid.minor.y = element_blank()) +
  theme(axis.title.x = element_blank()) +
  theme(panel.grid.major.x = element_blank()) +
  theme(axis.text.x.top = element_text(size=12)) +
  theme(axis.ticks = element_blank()) + 
  theme(panel.background = element_blank())

ggplot() +
  geom_sf(data = to_map_hond) + 
  geom_sf(data = to_map_hond, aes(fill=perc2016)) + 
  mario_scale_cont + 
  labels + mario_theme + updated_theme 

In the map above, a clear trendline emerges for 2016. States in the northern part of the state have higher percentages of survey respondents looking to emigrate in the next three years. In fact, of the eight states where more than 40% of the survey population is planning to emigrate, six of them are on the northern part of the country and many of them border the Atlantic coast.

Even when looking at rates of change in respondents planning to emigrate in the next three years, this regional variation is consistent, as evidenced by the graph below.

subemigrate <- hondLAPOP %>%
  select(year, prov, q14) %>%
  filter(year != 2012, year != 2014, prov != 414, prov != 415, prov != 416, prov != 417) %>%
  na.omit() %>%
  group_by(year, prov) %>%
  add_tally() %>%
  add_count(q14) %>%
  distinct(year, prov, q14, n, nn)

subemigrate$perc <- (subemigrate$nn / subemigrate$n) * 100

emigrate <- subemigrate %>%
  select(year, prov, perc, q14) %>%
  group_by(year, prov) %>%
  arrange(q14) %>%
  slice(1)  %>%
  select(year, prov, perc) %>%
  group_by(year, prov) %>%
  arrange(desc(perc))



## Stupid encodings don't work

emigrate$prov[emigrate$prov=='Atlántida'] <- "Atlántida"
emigrate$prov[emigrate$prov=='Colón'] <- "Colón"
emigrate$prov[emigrate$prov=='Copán'] <- "Copán"
emigrate$prov[emigrate$prov=='Cortés'] <- "Cortés"
emigrate$prov[33] <- "El Paraíso"
emigrate$prov[emigrate$prov=='Francisco Morazán'] <- "Francisco Morazán"
emigrate$prov[emigrate$prov=='Intibucá'] <- "Intibucá"
emigrate$prov[11] <- "Islas de la Bahía"
emigrate$prov[emigrate$prov=='Santa Bárbara'] <- "Santa Bárbara"

## Plot

# to add labels to plot
right_label <- emigrate %>%
  filter(year == 2016)

left_label <- emigrate %>%
  filter(year == 2010)

# to highlight only those greater than 30%
diff <- emigrate %>%
  spread(year, perc) %>%
  group_by(prov) 

colnames(diff)[colnames(diff)==2010] <- "perc2010"
colnames(diff)[colnames(diff)==2016] <- "perc2016"

diff_use <- diff %>%
  mutate(change = perc2016 - perc2010)  

highlight <- diff_use %>%
  filter(change > 30)

labels <- labs(title = "Most Honduran provinces saw increases in \n respondents planning to emigrate from \n 2010 to 2016",
               subtitle = "Out of 18 provinces, three experienced a 30% or greater increase \nin respondents planning to emigrate in the next three years.",
               caption = 'Source: LAPOP data 2010-2016') 

diff_use <- diff_use %>%
  arrange(desc(change))

ggplot(diff_use, aes(x=prov, y=change)) +
  geom_segment( aes(x=prov, xend=prov, y=0, yend=change), color="grey", size=1) +
  geom_point( color="grey", size=4, alpha=0.6) +
  geom_segment(data=highlight, aes(x=prov, xend=prov, y=0, yend=change), color = '#df65b0') +
  geom_point(data=highlight, size=4, color = '#67001f') +
  geom_text(data = highlight, aes(label = paste0('+', round(change, 0))),
            size = 3, hjust = -.35, color = '#67001f')  +
  coord_flip() +
  xlab("") +
  ylab("Percent Change from 2010 to 2016") + labels + theme_minimal() + mario_theme +  theme(panel.grid.major.x = element_blank(),
panel.grid.minor = element_blank())

While the vast majority of Honduran provinces have seen increases in people planning to emigrate from 2010 to 2016, three in particular stand out: Yoro, Cortés, and Colón. From 2010 to 2016, these provinces have seen increases of more than 30% in the percentage of respondents planning to emigrate. All three lie on the Atlantic Coast.

Media outlets and researchers often cite a range of push and pull factors driving this emigration. Generally speaking, violence and lack of economic opportunity are cited as two prominent push factors; while family reunification is often cited as a pull factor.

hond2016 <- hondLAPOP %>%
  filter(year == 2016)

## Subset to have just the variables we care about

touse <- hond2016 %>%
  select(q14, ur, q1, q2, idio2, it1, l1, aoj11, aoj17, aoj12, ed) %>%
  na.omit() 

## Create bins
### For age, 1 is less than 35 and 2 is greater than 35
touse$q2 <- cut(touse$q2, breaks = c(0, 35, Inf), include.lowest=TRUE, labels=c(1, 2))
### For personal economic situation, 1 is worse off than before, 2 is better off
touse$idio2 <- cut(touse$idio2, breaks=c(0, 2, Inf), include.lowest=TRUE, labels=c(2,1))
### For thinking the neighborhood is unsafe, 1 is unsafe and 2 is not
touse$it1 <- cut(touse$it1, breaks=c(0, 2, Inf), include.lowest=TRUE, labels=c(2,1))
### For political leaning, 1 is right wing and 2 is moderate or left wing
touse$l1 <- cut(touse$l1, breaks=c(0, 5, Inf), include.lowest=TRUE, labels=c(2,1))
### For confidence in judicial system if robbed, 1 is not confident, 2 is confident
touse$aoj12 <- cut(touse$aoj12, breaks=c(0, 2, Inf), include.lowest=TRUE, labels=c(2,1))
### For those who finished elementary school, 1 is not finished, 2 is finished
touse$ed <- cut(touse$ed, breaks=c(0, 6, Inf), include.lowest=TRUE, labels=c(1,2))
### Rural is 1, urban is 2
touse$ur<-as.numeric(touse$ur) 

## Let's get percentages of all 
percs <- touse %>%
  group_by(q14) %>%
  add_tally() %>%
  add_count(q2) 
percs$age <- (percs$nn / percs$n) * 100

percs <- percs %>%
  select (-c(n, nn)) %>%
  group_by(q14) %>%
  add_tally() %>%
  add_count(ur)
percs$rural <- (percs$nn / percs$n) * 100

percs <- percs %>%
  select (-c(n, nn)) %>%
  group_by(q14) %>%
  add_tally() %>%
  add_count(q1)
percs$gender <- (percs$nn / percs$n) * 100

percs <- percs %>%
  select (-c(n, nn)) %>%
  group_by(q14) %>%
  add_tally() %>%
  add_count(idio2)
percs$econ <- (percs$nn / percs$n) * 100

percs <- percs %>%
  select (-c(n, nn)) %>%
  group_by(q14) %>%
  add_tally() %>%
  add_count(it1)
percs$hood <- (percs$nn / percs$n) * 100

percs <- percs %>%
  select (-c(n, nn)) %>%
  group_by(q14) %>%
  add_tally() %>%
  add_count(l1)
percs$politics <- (percs$nn / percs$n) * 100

percs <- percs %>%
  select (-c(n, nn)) %>%
  group_by(q14) %>%
  add_tally() %>%
  add_count(aoj12)
percs$justice <- (percs$nn / percs$n) * 100

percs <- percs %>%
  select (-c(n, nn)) %>%
  group_by(q14) %>%
  add_tally() %>%
  add_count(ed)
percs$education <- (percs$nn / percs$n) * 100


## Pull out only those who answered 1 per question

rural <- percs %>%
  group_by(q14) %>%
  distinct(q14, ur, rural) %>%
  slice(1) %>%
  select(q14, rural)

age <- percs %>%
  group_by(q14) %>%
  distinct(q14, q2, age) %>%
  arrange(q2) %>%
  slice(1) %>%
  select(q14, age)

econ <- percs %>%
  group_by(q14) %>%
  distinct(q14, idio2, econ) %>%
  arrange(desc(idio2)) %>%
  slice(1) %>%
  select(q14, econ)

politics <- percs %>%
  group_by(q14) %>%
  distinct(q14, l1, politics) %>%
  arrange(desc(l1)) %>%
  slice(1) %>%
  select(q14, politics)

neighborhood <- percs %>%
  group_by(q14) %>%
  distinct(q14, it1, hood) %>%
  arrange(desc(it1)) %>%
  slice(1) %>%
  select(q14, hood)

confidence <- percs %>%
  group_by(q14) %>%
  distinct(q14, aoj12, justice) %>%
  arrange(desc(aoj12)) %>%
  slice(1) %>%
  select(q14, justice)

elementary <- percs %>%
  group_by(q14) %>%
  distinct(q14, ed, education) %>%
  arrange(ed) %>%
  slice(1) %>%
  select(q14, education)



percentages <- list(age, confidence, econ, elementary, neighborhood, politics, rural) %>%
  reduce(left_join, by='q14')

percentages <- as.data.frame(t(percentages))
colnames(percentages) = percentages[1, ] # the first row will be the header
percentages = percentages[-1, ]   
names(percentages) <- c("emigrate", "not_emigrate")
percentages <- cbind(answers = rownames(percentages), percentages)
rownames(percentages) <- 1:nrow(percentages)

percentages <- percentages %>%
  mutate(change = emigrate - not_emigrate) %>%
  arrange(desc(change))
  

emigratepos <- percentages %>%
  filter(change > 0) %>%
  arrange(desc(change))

emigrateneg <- percentages %>%
  filter(change < 0) %>%
  arrange(desc(change))

percentages$answers <- factor(percentages$answers, levels = percentages$answers[order(percentages$change)])

colors <- c('c1' = '#67001f', 'c2' = '#df65b0')
# Actually plot

plot <- ggplot(percentages, aes(emigrate, answers)) +
  geom_point(data = emigratepos, aes(emigrate),color = '#67001f', size =4) + 
  geom_point(data = emigrateneg, aes(emigrate), color = '#67001f', size =4) + 
  geom_point(data = emigratepos, aes(not_emigrate), color = '#df65b0', size=4) +
  geom_point(data = emigrateneg, aes(not_emigrate), color = '#df65b0', size=4) +
  geom_text(data = emigratepos, aes(label=paste0(round(emigrate, 0), '%')), color = '#67001f', hjust = -.5, size = 3,
            position = position_dodge(width = 1),
            inherit.aes = TRUE) +
  geom_text(data = emigrateneg, aes(label=paste0(round(emigrate, 0), '%')), color = '#67001f', hjust = 1.5, size = 3,
            position = position_dodge(width = 1),
            inherit.aes = TRUE) +
  geom_segment(data = emigratepos, aes(x=emigrate, xend=not_emigrate, y=answers, yend=answers), color = '#67001f', size=1) + 
  geom_segment(data = emigrateneg, aes(x=emigrate, xend=not_emigrate, y=answers, yend=answers), color = '#df65b0', size=1) +
  geom_text(data = emigratepos, aes(x = not_emigrate, y= answers, label=paste0(round(not_emigrate, 0), '%')), color = '#df65b0', hjust = 1.5, size = 3,
            position = position_dodge(width = 1)) +
  expand_limits(x=c(20,80)) +
  geom_text(data = emigrateneg, aes(x = not_emigrate, y=answers, label=paste0(round(not_emigrate, 0), '%')), color = '#df65b0', hjust = -1.25, size = 3,
            position = position_dodge(width = 1)) +
  scale_y_discrete(labels = c('% who are younger than 35',
                              '% who do not trust neighbors', 
                              '% whose economic situation has gotten worse', 
                              '% who dont trust the justice system',
                              '% who live in rural communities',
                              '% who self-identify as conservative', 
                              '% who did not complete elementary school')) +
  scale_color_manual(name="Hondurans", 
                     breaks=c('c1', 'c2'), 
                     values = colors,
                     labels = c('Planning to Emigrate', 'Not Planning to Emigrate'))

labels <- labs(title = "Hondurans planning to emigrate \n (dark purple) are more likely to \n report living in unsafe, gang-filled \n neighborhoods than those who \n don't (light purple)",
     subtitle = "Surprisingy, those who plan to emigrate are more likely to \n flhave finished elementary school, live in a city, be younger, \n and be less politically conservative than those who don't.",
     caption = 'Source: LAPOP 2016') 

update_themes <- 
  theme(axis.title = element_blank(),
        panel.grid.major.x = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        axis.ticks = element_blank(),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        plot.caption = element_blank())

plot + labels  + mario_theme + update_themes

However, as the graph above shows, it appears that in Honduras – the country that’s seen the largest increases in its citizens planning to emigrate – there are a range of factors that might be driving emigration. Honduran residents planning to emigrate tend to be younger, more liberal, and urban dwellers that those who don’t. They also tend to cite violence as an issue in their lives moreso than people planning to stay, while also citing economic concerns at a lesser rate. This indicates that violence might be the biggest push factor in Honduras currently.

Even as it appears that Honduran migrants are fleeing poverty more than violence, they face additional economic burdens as they try to make it to the United States. Often, in order to secure a safe passage from the Guatemala-Mexico border to the US-Mexico border, these migrants will secure the assistance of ‘coyotes,’ members of criminal organizations that guide them in their journey.

tograph <- slopegraph %>%
  group_by(year, pais) %>%
  filter(p17_1c >0, p17_1u == 3) %>%
  summarize(mean_spent = mean(p17_1c)) %>%
  mutate_if(is.numeric, round, 0)

rm(e2014, e2015, e2016, e2017, h2014, h2015, h2016, h2017, g2014, g2015, g2016, g2017)

# Slope graph

 
plot <- ggplot(data = tograph, aes(x = year, y = mean_spent, group = pais)) +
  geom_line(aes(color = pais, alpha = 1), size = 1) +   mario_manual +
  geom_text_repel(data = tograph %>% filter(year == 2017), 
                  aes(label = pais) , 
                  hjust = "right", 
                  fontface = "bold", 
                  size = 3.5, 
                  nudge_x = -0.25, 
                  direction = "y") + 
  geom_label_repel(aes(label = paste0("$", mean_spent)), 
             size = 3, 
             label.padding = unit(0.05, "lines"), 
             label.size = 0.0) 

updated_theme <- 
  # move the x axis labels up top
  #scale_x_discrete(position = "top") +
  # Format tweaks
  # Remove the legend
  theme(legend.position = "none") +
  # Remove the panel border
  theme(panel.border     = element_blank()) +
  # Remove just about everything from the y axis
  theme(axis.title.y     = element_blank()) +
  theme(axis.text.y      = element_blank()) +
  theme(panel.grid.major.y = element_blank()) +
  theme(panel.grid.minor.y = element_blank()) +
  # Remove a few things from the x axis and increase font size
  theme(axis.title.x     = element_blank()) +
  theme(panel.grid.major.x = element_blank()) +
  theme(axis.text.x.top      = element_text(size=12)) +
  # Remove x & y tick marks
  theme(axis.ticks       = element_blank()) + theme(panel.background = element_blank())
  #  Labelling as desired
labels <- labs(
    title = "Honduran migrants have experienced marked increases \n in the price of hiring a coyote to guide them \n through Mexico",
    subtitle = "Guatemalan migrants have experienced similar increases, but Salvadorean migrants \n have paid less for a successful trip",
    caption = "Source: EMIF Sur (2014-2017)"
  )

plot + labels + mario_theme + updated_theme

rm(tograph)

As evidenced by the graph above, these coyotes are expensive – and are getting more costly over time. For migrants originating from the three Northern Triangle countries who safely made it to the US-Mexico border but got deported from the United States, the price paid to a coyote has increased significantly. Nowhere is this increase more prominent than for the Honduran migrants who are fleeing dire economic circumstances. The cost of a coyote for those Honduran migrants who reached the US-Mexico border has increased from 3942 dollars in 2014 to 6386 dollars in 2017.

Even with the cost associated with the trip, migrants face signifincant dangers in their journey through Mexico. As they traverse a country known for it’s powerful criminal organizations, associated violence, and weak protections for migrants, these migrants are often robbed, injured, or more.

## Get data into percentages of each event
usa_g <- usa %>% 
  select(sexo, pais, p18_1, p18_2, p18_3, p18_4, p18_5, p18_6, p18_7, p18_8, p18_9, p18_10, p18_11, p18_12, p18_13) %>%
  group_by(sexo, pais) %>%
  filter(p18_1 > 0, p18_2 > 0, p18_3 >0, p18_4 > 0, p18_5 > 0, p18_6 > 0, p18_7 > 0, p18_8 > 0, p18_9 > 0, p18_10 > 0, p18_11 > 0, p18_12 > 0, p18_13 > 0) %>%
  filter(p18_1 < 10, p18_2 < 10, p18_3 <10, p18_4 < 10, p18_5 < 10, p18_6 < 10, p18_7 < 10, p18_8 < 10, p18_9 < 10, p18_10 < 10, p18_11 < 10, p18_12 < 10, p18_13 < 10)
usa_g[usa_g==2] <- 0

# since it's 0 and 1, a mean will work
usa_perc <- usa_g %>%
  group_by(sexo, pais) %>%
  summarise_all(mean) %>%
  select(sexo, pais, p18_1, p18_2, p18_6, p18_8, p18_9) 

# Rename columns
colnames(usa_perc)[colnames(usa_perc)=="p18_1"] <- "Cold"
colnames(usa_perc)[colnames(usa_perc)=="p18_2"] <- "Lack of Food"
colnames(usa_perc)[colnames(usa_perc)=="p18_6"] <- "Lost"
colnames(usa_perc)[colnames(usa_perc)=="p18_8"] <- "Abandoned by Coyote"
colnames(usa_perc)[colnames(usa_perc)=="p18_9"] <- "Assault or Robbery"

# Melt
melt <- melt(usa_perc, id.vars=c('sexo', 'pais'))
melt$value <- melt$value * 100

# Nightingale graph

plot <- ggplot(melt, aes(x=variable, y=value, fill=pais)) + geom_bar(width = 1, stat='identity') + 
  coord_polar(start = 1, direction=1, clip = 'off') +
  scale_fill_brewer(palette='RdPu')+ xlab("") + ylab("Percent facing challenge") + facet_wrap(~pais)

labels <- labs(title='Honduran migrants are more likely \n to be robbed, while Salvadorean \n migrants face extreme cold',
subtitle='Hondurans pay more for coyotes, but are \n the only group likely to be abandoned by them or get lost',
caption='Source: EMIF Sur(2014-2107)')

plot + labels + mario_theme

In the graph above, five of the most common self-reported challenges facing migrants who made it to the United States in 2017 are shown. Notably, Honduran migrants faced the largest share of assault or robberies of all, while Salvadorean migrants faced a disproportionate share of extreme cold and lack of food. Interestingly, despite paying more for coyotes to guide them through Mexico, Honduran migrants are the only group that reported getting lost or getting abandoned by their coyote on the trip.

In addition to these challenges, there is guarantee that Central American migrants will evade migration forces in Mexico. The likelihood of being deported while traversing Mexico has likely increased in the last three years. Mexico, working in partnership with the United States, has beefed up its efforts to deport Central American migrants since 2014.

drop1 <- alluvial %>% filter(p15 >= 0, p22_1l >= 0, p22_2l >= 0, p26 >= 0, p24l >= 0)
clean <- drop1 %>% filter(p15 < 2000000, p22_1l < 900000000, p22_2l < 900000000, p26 < 300000000, p24l < 9000000000)
rm(drop1)

alluvialnodeshond <- rename(count(clean, deported, p15, p22_1l, p22_2l, p24l, p26, p34l), Freq = n)
touse <- alluvialnodeshond %>% filter(Freq>2)
rm(clean, alluvialnodeshond)

## Make names for it

touse$p15[touse$p15 == 1217001] <- 'Tecun Uman'
touse$p15[touse$p15 == 1705099] <- 'Naranjo'
touse$p15[touse$p15 == 1705016] <- 'El Ceibo'
touse$p15[touse$p15 == 1705015] <- 'Bethel'
touse$p15[touse$p15 == 1312047] <- 'La Mesilla'

touse$p22_1l[touse$p22_1l == 301930001] <- 'Veracruz, VER'
touse$p22_1l[touse$p22_1l == 300390001] <- 'Coatzacoalcos, VER'
touse$p22_1l[touse$p22_1l == 270170001] <- 'Tenosique de Pino Suarez, TAB'
touse$p22_1l[touse$p22_1l == 270040001] <- 'Villahermosa, TAB'
touse$p22_1l[touse$p22_1l == 240280001] <- 'San Luis Potosi, SLP'
touse$p22_1l[touse$p22_1l == 190390001] <- 'Monterrey, NL'
touse$p22_1l[touse$p22_1l == 70650001] <- 'Palenque, CHIS'
touse$p22_1l[touse$p22_1l == 70270001] <- 'Chiapa de Corzo, CHIS'


touse$p24l[touse$p24l == 300030001] <- 'Acayucan, VER'
touse$p24l[touse$p24l == 280320001] <- 'Reynosa, TAMPS'
touse$p24l[touse$p24l == 280270001] <- 'N. Laredo, TAMPS'
touse$p24l[touse$p24l == 270020001] <- 'Cardenas, TAB'
touse$p24l[touse$p24l == 301930001] <- 'Veracruz, VER'
touse$p24l[touse$p24l == 300390001] <- 'Coatzacoalcos, VER'
touse$p24l[touse$p24l == 270170001] <- 'Tenosique de Pino Suarez, TAB'
touse$p24l[touse$p24l == 270040001] <- 'Villahermosa, TAB'
touse$p24l[touse$p24l == 70650001] <- 'Palenque, CHIS'

touse$p34l[touse$p34l == 0] <- 'Avoided Deportation'
touse$p34l[touse$p34l == 300030001] <- 'Acayucan, VER'
touse$p34l[touse$p34l == 270020001] <- 'Cardenas, TAB'
touse$p34l[touse$p34l == 301930001] <- 'Veracruz, VER'
touse$p34l[touse$p34l == 300390001] <- 'Coatzacoalcos, VER'
touse$p34l[touse$p34l == 270170001] <- 'Tenosique de Pino Suarez, TAB'
touse$p34l[touse$p34l == 70650001] <- 'Palenque, CHIS'
touse$p34l[touse$p34l == 270040001] <- 'Villahermosa, TAB'


touse$p26[touse$p26 == 0] <- 'Deported Prior'
touse$p26[touse$p26 == 280320001] <- 'Reynosa, TAMPS'
touse$p26[touse$p26 == 280270001] <- 'N. Laredo, TAMPS'

touse$p26 <- as.factor(touse$p26)
fct_rev(touse$p26)
##  [1] Deported Prior   Deported Prior   Deported Prior   Deported Prior  
##  [5] Deported Prior   Deported Prior   Deported Prior   Deported Prior  
##  [9] Deported Prior   Deported Prior   Deported Prior   Deported Prior  
## [13] Deported Prior   Deported Prior   Deported Prior   Deported Prior  
## [17] Deported Prior   Deported Prior   Deported Prior   Deported Prior  
## [21] Deported Prior   Reynosa, TAMPS   Reynosa, TAMPS   Reynosa, TAMPS  
## [25] Reynosa, TAMPS   Reynosa, TAMPS   Reynosa, TAMPS   Reynosa, TAMPS  
## [29] Reynosa, TAMPS   Reynosa, TAMPS   Reynosa, TAMPS   Reynosa, TAMPS  
## [33] Reynosa, TAMPS   Reynosa, TAMPS   Reynosa, TAMPS   N. Laredo, TAMPS
## [37] Reynosa, TAMPS   Reynosa, TAMPS   N. Laredo, TAMPS Reynosa, TAMPS  
## [41] N. Laredo, TAMPS Reynosa, TAMPS   Reynosa, TAMPS   Reynosa, TAMPS  
## [45] Reynosa, TAMPS   Reynosa, TAMPS  
## Levels: Reynosa, TAMPS N. Laredo, TAMPS Deported Prior
touse$p24l <- as.factor(touse$p24l)

colnames(touse)[colnames(touse)=="p15"] <- "entry"


plot <- ggplot(touse,
       aes(y = Freq,
           axis1 = entry, axis2 = p22_1l, axis3 = p24l)) +   scale_fill_brewer(type='qual', palette = "Set1") +
  geom_flow(aes(fill = deported, alpha = entry, colour = deported), stat = "alluvium", aes.bind = TRUE, reverse=TRUE, width = 0.1) +
  geom_stratum(width = 1/10, reverse = TRUE) +
  geom_text(stat = "stratum", label.strata = TRUE, reverse = TRUE) +
  scale_x_continuous(breaks = 1:3, labels = c("Mexico Port", "Mexican City Visited", "US Port/Deported From")) 

labels <- labs(
    title = "As they journey northward, Honduran migrants who \n get deported in Mexico get stuck in cities \n that border the Gulf of Mexico",
    subtitle = "Most migrants who make it to the US enter through Reynosa",
    caption = "Source: EMIF Sur (2014-2017)") 

updated_theme <- theme(panel.background = element_blank(),
                       axis.title.y = element_blank(),
                       axis.text.y = element_blank(),
                       axis.ticks = element_blank())

plot+ labels + mario_theme + updated_theme

rm(touse)

The graph above, which shows the paths that Honduran migrants take within Mexico, shows that there are some slight differences in the paths that deported migrants and non-deported migrants take. Namely, migrants entering Mexico through Bethel tend to all be deported, while migrants who make a stop in Chiapa de Corso tend to all make it to the US-Mexico border. Additionally, it seems that deported migrants tend to be caught in cities either near the Guatemala-Mexico border or on the Gulf of Mexico, while migrants that manage to make it safely through those cities often wind up making it to Reynosa, where they cross into the United States.

To further highlight this point, the map below shows the states where Central American migrants get deported by Mexican forces. It’s fairly evident that most Salvadorean and Guatemalan migrants are deported in states that border the Mexico-Guatemala border, while Honduran migrants tend to get deported on the Gulf of Mexico.

highlight <- geocoded %>%
  filter(perc > 3)

#joined <- mex_shape %>%
#  stringdist_inner_join(geocoded, by=c(NAME_2='Descripción.x', NAME_1='Descripción.y'), max_dist=1)

to_map <- st_simplify(mex_shape)
## Warning in st_simplify.sfc(st_geometry(x), preserveTopology, dTolerance):
## st_simplify does not correctly simplify longitude/latitude data, dTolerance
## needs to be in decimal degrees
to_mapa_laea <- st_transform(to_map, 6372)

# Map

labels <- labs(title = 'The majority of Honduran migrants get deported \n along the Gulf of Mexico',
               subtitle = 'Guatemalan and El Salvadorean migrants tend to get deported \n on the Mexico-Guatemala border',
               caption = 'Source: EMIF, 2016')

updated_theme <- theme(panel.border = element_blank()) +
  theme(axis.title.y = element_blank()) +
  theme(axis.text.y = element_blank()) +
  theme(axis.text.x = element_blank()) +
  theme(panel.grid.major.y = element_blank()) +
  theme(panel.grid.minor.y = element_blank()) +
  theme(axis.title.x = element_blank()) +
  theme(panel.grid.major.x = element_blank()) +
  theme(axis.text.x.top = element_text(size=12)) +
  theme(axis.ticks = element_blank()) + 
  theme(panel.background = element_blank())

p <- ggplot() +
  #geom_sf(data=to_mapa_laea) +
  geom_point(data = geocoded, aes(x = lon, y = lat, size = perc), color = 'grey', show.legend = FALSE) +
  geom_point(data = highlight, aes(x = lon, y = lat, size = perc, colour=pais)) + 
  geom_text_repel(data = highlight, aes(x= lon, y=lat, label = Descripción.x)) + 
  mario_theme + labels + updated_theme

p + transition_states(pais)

In addition to deportation, Central American migrants working their way through Mexico face significant dangers. For that reason, academic experts and migrants rights advocates have often called for more investment in Northern Triangle countries to reduce violence, create more economic opportunity, and incentivize potential migrants to stay home.

To that end, as the graph above shows, US aid to Honduras has emphasized investments in civil society and government since 2010. Notably, investments in these areas increased significantly in 2015, likely as a response to the unaccompanied minors crisis on the US southern border. Though, given the surge in Hondurans planning to emigrate in the next three years, it appears this aid has yet to make an impact in the country.